Setup

Wet lab

These were cell suspensions of meningeal dura, enriched by MACS for cells expressing Cd45, Cd31, and Lyve-1. We sent libraries from ~5000 cells per sample to be sequenced at least 50,000 reads per cell. We might have ended up with more though, because we sequenced in the NovaSeq S2. We are expecting to see a lot of leukocytes and vascular cells. The purpose is to check the effect of sex, Apoe isoform expression, and age (and respective interactions) on the transcriptomes of the different cell subpopulations.

The meningeal layer of the dura mater is a durable, dense fibrous membrane that passes through the foramen magnum and is continuous with the dura mater of the spinal cord. The meningeal layer of the dura mater creates several dural folds that divide the cranial cavity into freely communicating spaces.

Flow markers:
- CD45/Ptprc/B220/Ly-5/Lyt-4/T200: transmembrane protein tyrosine phosphatase, located on most haematopoietic cells, positive selection of leukocytes
- CD31/Pecam1: adhesion and signaling receptor that is expressed on endothelial and hematopoietic cells, positive selection of endothelial cells
- Lyve1/1200012G08Rik/Lyve-1/Xlkd1/lymphatic vessel endothelial HA receptor-1: found primarily on lymphatic endothelial cells

Working directory

knitr::opts_knit$set(
  root.dir = "/research/labs/neurology/fryer/m214960/Da_Mesquita/scripts/R")

Libraries

# load packages
library(ComplexUpset) # upset()
library(dplyr)        # ungroup()
library(ggrepel)      # geom_text_repel()
library(ggtree)       # BuildClusterTree()
library(gtools)       # smartbind()
library(parallel)     # detectCores()
library(plotly)       # plot_ly()
library(UpSetR)       # fromList()
library(Seurat)       # Idents()
library(ShinyCell)    # createConfig()
#library(slingshot)   # slingshot()
#library(tradeSeq)
library(UpSetR)       # fromList()

# work in parallel
options(mc.cores = detectCores() - 1)

Save functions

saveToPDF <- function(...) {
    d = dev.copy(pdf,...)
    dev.off(d)
}

saveToPNG <- function(...) {
    d = dev.copy(png,...)
    dev.off(d)
}

Set variables

# variables
sample_order <- c("E3.2M.M","E3.2M.F","E4.2M.M","E4.2M.F",
                  "E3.14M.M","E3.14M.F","E4.14M.M","E4.14M.F")
age_order <- c("2 months","14 months")
sex_order <- c("Male","Female")
isoform_order <- c("E3","E4")
sample_colors <- c("gray","red","orange","yellow","green","blue","purple","pink")
age_colors <- c("darkgray","chartreuse3")
sex_colors <- c("darkgray","purple")
isoform_colors <- c("darkgray","cornflowerblue")

# set seed
set.seed(8)

# work in parallel
options(mc.cores = detectCores() - 1)

Read data

mouse.unannotated <- readRDS("../../rObjects/mouse_cellbender_unannotated.rds")
Idents(mouse.unannotated) <- "seurat_clusters"
DefaultAssay(mouse.unannotated) <- "RNA"
mouse.unannotated <- NormalizeData(mouse.unannotated)

mouse.unannotated
## An object of class Seurat 
## 39470 features across 35149 samples within 2 assays 
## Active assay: RNA (20251 features, 0 variable features)
##  1 other assay present: SCT
##  3 dimensional reductions calculated: pca, harmony, umap

Clustering QC before annotation

Unannotated UMAP

# UMAP with clusters
cluster_colors <- c("black","gray40","gray","red1","blue","magenta1","darkorange2",
                    "darkorange4","yellow1","yellow4","yellow2","green","lightgreen",
                    "chartreuse1","forestgreen","cyan","SteelBlue","red4","Aquamarine",
                    "purple1","purple4","orange","plum1","burlywood2","lightsalmon",
                    "sandybrown")
DimPlot(object = mouse.unannotated,
        reduction = "umap",
        repel = TRUE,
        cols = cluster_colors,
        label = TRUE)

# UMAP by isoform
DimPlot(object = mouse.unannotated,
        reduction = "umap",
        repel = TRUE,
        split.by = "isoform",
        cols = cluster_colors)

# UMAP by age
DimPlot(object = mouse.unannotated,
        reduction = "umap",
        repel = TRUE,
        split.by = "age",
        cols = cluster_colors)

# UMAP by sex
DimPlot(object = mouse.unannotated,
       reduction = "umap",
       repel = TRUE,
       split.by = "sex",
       cols = cluster_colors)

# UMAP by cell cycle phase
DimPlot(object = mouse.unannotated,
       reduction = "umap",
       repel = TRUE,
       split.by = "Phase",
       cols = cluster_colors)

Feature plots

# UMAP percent.mt
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "percent.mt") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP percent.ribo
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "percent.ribo.protein") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP percent.hb
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "percent.hb") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP nCount
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "nCount_RNA") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP nFeature
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "nFeature_RNA") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP cell.complexity
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "cell.complexity") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

# UMAP Ttr expression
FeaturePlot(mouse.unannotated,
            reduction = "umap", 
            features = "Ttr") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))

Cells per cluster

# Cells per sample per cluster
sample_ncells <- FetchData(mouse.unannotated, 
                     vars = c("ident", "sample")) %>%
  dplyr::count(ident,sample) %>%
  tidyr::spread(ident, n)
sample_ncells
##     sample   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16
## 1  E3.2M.M 300 612 370 249 366 239 201 140 220 247  98 149 110 111  58 124 143
## 2  E3.2M.F 458 693 160 296 260 261 249 180 170 298 182 220 132 142 100 121 115
## 3  E4.2M.M 341 277 289 163 197 166 205 290 135 137 103  81 103 123  77  43  50
## 4  E4.2M.F 406 379 273 269 313 269 296 157 124 119 144  73 136 122 101  70  81
## 5 E3.14M.M 490 335 295 312 275 249 274 327 282 206 216 113 184 199  96  85  93
## 6 E3.14M.F 233 368 300 363 335 294 157 355 173 183 152 154  62  11  77 105  96
## 7 E4.14M.M 433 233 369 278 256 250 281 329 310 189 167  92 120 144  94 114  90
## 8 E4.14M.F 691 447 587 527 431 562 355 121 456 294 300 200 216 118 362 192 181
##    17  18 19  20  21  22 23 24
## 1 177  41 56  60  43  31 26 25
## 2 112  75 75  85 114 102 18 14
## 3  87  54 59  40  48  86 25 22
## 4 118  61 53  33  31  58 37 31
## 5  97 113 97  66  88  53 45 29
## 6  54 103 64  58  28  11 13 13
## 7  52 126 81  72  70  61 50 28
## 8  62 159 85 128  85  50 52 35
# Cells per isoform per cluster
isoform_ncells <- FetchData(mouse.unannotated, 
                     vars = c("ident", "isoform")) %>%
  dplyr::count(ident,isoform) %>%
  tidyr::spread(ident, n)
isoform_ncells
##   isoform    0    1    2    3    4    5    6    7    8   9  10  11  12  13  14
## 1      E3 1481 2008 1125 1220 1236 1043  881 1002  845 934 648 636 488 463 331
## 2      E4 1871 1336 1518 1237 1197 1247 1137  897 1025 739 714 446 575 507 634
##    15  16  17  18  19  20  21  22  23  24
## 1 435 447 440 332 292 269 273 197 102  81
## 2 419 402 319 400 278 273 234 255 164 116
# Cells per sex per cluster
sex_ncells <- FetchData(mouse.unannotated, 
                     vars = c("ident", "sex")) %>%
  dplyr::count(ident,sex) %>%
  tidyr::spread(ident, n)
sex_ncells
##      sex    0    1    2    3    4    5    6    7   8   9  10  11  12  13  14
## 1   Male 1564 1457 1323 1002 1094  904  961 1086 947 779 584 435 517 577 325
## 2 Female 1788 1887 1320 1455 1339 1386 1057  813 923 894 778 647 546 393 640
##    15  16  17  18  19  20  21  22  23  24
## 1 366 376 413 334 293 238 249 231 146 104
## 2 488 473 346 398 277 304 258 221 120  93
# Cells per age per cluster
age_ncells <- FetchData(mouse.unannotated, 
                     vars = c("ident", "age")) %>%
  dplyr::count(ident,age) %>%
  tidyr::spread(ident, n)
age_ncells
##         age    0    1    2    3    4    5    6    7    8   9  10  11  12  13
## 1  2 months 1505 1961 1092  977 1136  935  951  767  649 801 527 523 481 498
## 2 14 months 1847 1383 1551 1480 1297 1355 1067 1132 1221 872 835 559 582 472
##    14  15  16  17  18  19  20  21  22  23  24
## 1 336 358 389 494 231 243 218 236 277 106  92
## 2 629 496 460 265 501 327 324 271 175 160 105

Gene histogram

# User params
goi <- "Malat1"
threshold <- 1

# Subset data
log2.threshold <- log2(threshold + 0.01)
counts.df <- FetchData(mouse.unannotated, vars = goi)
colnames(counts.df) <- "counts"
log2.counts.df <- log2(counts.df + 0.01)

# Histogram
title <- paste0(goi, "\nnCount_RNA > ", threshold)
hist1 <- ggplot(counts.df, aes(x = counts)) + 
  geom_histogram(bins = 100, fill = "gray", color = "black") + 
  labs(title = title, x=NULL, y=NULL) +
  xlab(paste0(goi, " nCount_RNA")) + ylab("# of Samples") + theme_bw() +
  geom_vline(xintercept = threshold, col = "blue") +
  annotate("rect", 
           xmin = -Inf,
           xmax = threshold, 
           ymin = 0, 
           ymax=Inf, 
           alpha=0.2, 
           fill="chocolate4") +
  annotate("rect", 
           xmin = threshold,
           xmax = Inf, 
           ymin = 0, 
           ymax=Inf, 
           alpha=0.2, 
           fill="deepskyblue")

# Histogram log transformed
hist2 <- ggplot(log2.counts.df, aes(x = counts)) + 
  geom_histogram(bins = 100, fill = "gray", color = "black") + 
  labs(title = title, x=NULL, y=NULL) +
  xlab(paste0("Log2(",goi, " nCount_RNA)")) + ylab("# of Samples") + theme_bw() +
  geom_vline(xintercept = log2.threshold, col = "blue") +
  annotate("rect", 
           xmin = -Inf,
           xmax = log2.threshold, 
           ymin = 0, 
           ymax=Inf, 
           alpha=0.2, 
           fill="chocolate4") +
  annotate("rect", 
           xmin = log2.threshold,
           xmax = Inf, 
           ymin = 0, 
           ymax=Inf, 
           alpha=0.2, 
           fill="deepskyblue")

# plot
plots1 <- list(hist1,hist2)
layout1 <- rbind(c(1),c(2))
grid1 <- grid.arrange(grobs = plots1, layout_matrix = layout1)

Percent gene

# user define variable
goi <- "Malat1"

# Extract counts data
DefaultAssay(mouse.unannotated) <- "RNA"
Idents(mouse.unannotated) <- "SCT_snn_res.0.5"
geneData <- FetchData(mouse.unannotated,
                      vars = goi,
                      slot = "counts")
geneData <- geneData > 1
table(geneData)
## geneData
## FALSE  TRUE 
##  3645 31504
mouse.unannotated$Expression <- geneData

FetchData(mouse.unannotated,
          vars = c("ident", "Expression")) %>%
  dplyr::count(ident, Expression) %>%
  tidyr::spread(ident, n)
##   Expression    0    1    2    3    4    5    6    7    8    9   10   11  12
## 1      FALSE   36   29 1569  172   61  818  469    9    1    6   56    3 116
## 2       TRUE 3316 3315 1074 2285 2372 1472 1549 1890 1869 1667 1306 1079 947
##    13  14  15  16  17  18  19  20  21  22  23  24
## 1  NA 200  NA   2   1  55   4  24  10   3  NA   1
## 2 970 765 854 847 758 677 566 518 497 449 266 196
# Plot
mouse.unannotated@meta.data %>%
  group_by(SCT_snn_res.0.5, Expression) %>%
  dplyr::count() %>%
  group_by(SCT_snn_res.0.5) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=SCT_snn_res.0.5,y=percent, fill=Expression)) +
  geom_col() +
  ggtitle(paste0("Percentage of cells with > 1 counts for ", goi)) +
  theme(axis.text.x = element_text(angle = 90))

Cluster tree

  • Cluster trees are helpful in deciding what clusters to merge.
mouse.unannotated <- BuildClusterTree(object = mouse.unannotated,
                                     dims = 1:21, # min.pc in processing script
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)
tree <- mouse.unannotated@tools$BuildClusterTree
tree$tip.label <- paste0(tree$tip.label)

ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = cluster_colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))

Potential Markers

Highly variable PCs

# Printing out the most variable genes driving PCs
print(x = mouse.unannotated[["harmony"]], 
      dims = 1:10, 
      nfeatures = 10)
## harmony_ 1 
## Positive:  Lyz2, Cd74, C1qa, C1qb, Mrc1, Apoe, H2-Aa, H2-Ab1, C1qc, H2-Eb1 
## Negative:  Ptprb, Flt1, Mgp, Plpp3, Igfbp7, Kdr, Sparc, Col4a1, Plvap, Podxl 
## harmony_ 2 
## Positive:  Col1a2, Igfbp5, Col1a1, Mgp, Col12a1, Slc38a2, Cdh11, Ogn, Ecrg4, Gas1 
## Negative:  Flt1, Ptprb, Kdr, Plvap, Podxl, Igfbp3, Pecam1, Egfl7, Emcn, Ly6c1 
## harmony_ 3 
## Positive:  C1qa, Apoe, Mrc1, C1qb, C1qc, Pf4, Selenop, Stab1, Ctsb, Dab2 
## Negative:  S100a9, S100a8, Cd52, S100a6, Hp, Hmgb2, Lcn2, Pglyrp1, S100a11, Retnlg 
## harmony_ 4 
## Positive:  S100a9, S100a8, Lyz2, Mmp9, Hp, Lcn2, Retnlg, Pglyrp1, Mmp8, Wfdc21 
## Negative:  Cd79a, Cd74, Cd79b, Ly6d, Ms4a1, Ptprcap, Vpreb3, Siglecg, Pax5, Spib 
## harmony_ 5 
## Positive:  Cd74, H2-Ab1, H2-Aa, H2-Eb1, Ccr2, Mpeg1, Cd52, S100a4, Ctss, Crip1 
## Negative:  Stab1, Mrc1, Cpa3, Cma1, Tpsb2, Mcpt4, Il1rl1, Serpinb1a, Mrgprb1, Cd163 
## harmony_ 6 
## Positive:  Cd79a, Cd79b, Ly6d, Ms4a1, Vpreb3, Pax5, Plpp3, Cd24a, Spib, Siglecg 
## Negative:  Crip1, Myh11, Rgs5, Acta2, Myl9, Tagln, Ccr2, Notch3, S100a4, Vim 
## harmony_ 7 
## Positive:  Il1rl1, Kit, Cpa3, Tpsb2, Cma1, Mcpt4, Mrgprb1, Slc18a2, Ms4a2, Cd200r3 
## Negative:  Myl9, Rgs5, Myh11, Notch3, Acta2, Tagln, Tpm2, Cd79a, Ebf1, Igfbp7 
## harmony_ 8 
## Positive:  Mki67, Top2a, Stmn1, Hist1h1b, Knl1, Kif11, H2afz, Hmgb2, Pclaf, Prc1 
## Negative:  Ms4a1, Ly6d, Ltb, Cd37, Cd52, Ikzf3, Ptprc, Cd79a, Cd79b, Cd74 
## harmony_ 9 
## Positive:  Vwf, Cfh, Slco1a4, Cldn5, Clec14a, Slco2a1, Adgrg6, Mal, Ptn, Lrg1 
## Negative:  Igfbp3, Igfbp7, Cpa3, Tpsb2, Cma1, Mcpt4, Col4a1, Mrgprb1, Kit, Serpinb1a 
## harmony_ 10 
## Positive:  Vwf, Cd74, H2-Ab1, H2-Aa, H2-Eb1, Cfh, Cd79a, Slco1a4, Ly6d, Cldn5 
## Negative:  Ms4a4b, Gimap3, Il7r, Trbc2, Skap1, Cd3g, Bcl11b, Cd3e, Ccl5, Trbc1

Top 100 variable genes

# print top variable genes
DefaultAssay(mouse.unannotated) <- "SCT"
VariableFeatures(mouse.unannotated)[1:100]
##   [1] "S100a9"    "S100a8"    "Igfbp5"    "Mgp"       "Cd74"      "Ngp"      
##   [7] "Retnlg"    "Rgs5"      "Ltf"       "Lcn2"      "Cpa3"      "Camp"     
##  [13] "Tpsb2"     "Cma1"      "Mcpt4"     "Igfbp3"    "Mmp8"      "Col1a2"   
##  [19] "H2-Ab1"    "Igf2"      "Wfdc21"    "Dcn"       "Ccl8"      "Ly6d"     
##  [25] "Chil3"     "H2-Eb1"    "Lyz2"      "Vpreb3"    "Ms4a1"     "H2-Aa"    
##  [31] "Vwf"       "Cd79a"     "Ccl5"      "Myh11"     "Acta2"     "Col1a1"   
##  [37] "Mki67"     "Mrgprb1"   "Fabp4"     "Tagln"     "Gpx3"      "Col12a1"  
##  [43] "H19"       "Kit"       "Lum"       "Kcna1"     "Ogn"       "Cxcl12"   
##  [49] "Cldn5"     "Apod"      "Flt1"      "Mrc1"      "Top2a"     "Hist1h1b" 
##  [55] "Slc38a2"   "Apoe"      "Cdh11"     "S100a6"    "Hmgb2"     "Ecrg4"    
##  [61] "Tpm2"      "S100a4"    "Ly6c2"     "Ptprb"     "Mpz"       "Comp"     
##  [67] "C1qa"      "Plac8"     "Pf4"       "Cd79b"     "Cxcl9"     "Fn1"      
##  [73] "Ifitm6"    "Plp1"      "Col3a1"    "Il1rl1"    "Slpi"      "Ccl12"    
##  [79] "Myl9"      "Ptgds"     "Kdr"       "Hist1h3c"  "Ccr2"      "Pglyrp1"  
##  [85] "Mfap4"     "Hist1h2ae" "Mmp9"      "Fmod"      "Cd209f"    "Il1b"     
##  [91] "C1qb"      "Mbp"       "Vtn"       "Slco1a4"   "Snhg11"    "Scn7a"    
##  [97] "Mmrn1"     "Penk"      "Gzma"      "Gas1"
DefaultAssay(mouse.unannotated) <- "RNA"

Flow markers

  • Ptprc - located on most haematopoietic cells, positive selection of leukocytes
  • Pecam1 - adhesion and signaling receptor that is expressed on endothelial and hematopoietic cells, positive selection of endothelial cells
  • Lyve1 - found primarily on lymphatic endothelial cells
VlnPlot(mouse.unannotated,
        features = "Ptprc",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.

VlnPlot(mouse.unannotated,
        features = "Lyve1",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Pecam1",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

B-cell

  • Cd19: expressed in B-cells and follicular dendritic cells
  • Fcrla: a B-cell specific protein in mice
  • Cd79a & Cd79b: together form BCR complex
  • Ms4a1: aka Cd20, expressed beginning at pro-B phase and progressively increases in concentration until maturity
  • Clusters 11, 20, and 21 are B cell clusters
VlnPlot(mouse.unannotated,
        features = "Cd19",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Fcrla",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd79a",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd79b",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Ms4a1",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

Border Associated Macrophages

VlnPlot(mouse.unannotated,
        features = "Mrc1",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Ms4a7",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Pf4",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Lyve1",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

T-cell

  • Trac/Cd3d/Cd3e/Cd3g components of the TCR
  • Clusters 7 and 24 are T cells
VlnPlot(mouse.unannotated,
        features = "Trac",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd3d",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd3e",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd3g",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd8a",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd4",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

Endothelial cells

VlnPlot(mouse.unannotated,
        features = "Pecam1",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cdh5",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Kdr",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Flt1",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Plvap",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

Fibroblasts

  • Clusters 1, 12, 14, and 23 are fibroblasts
VlnPlot(mouse.unannotated,
        features = "Col1a1",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Col1a2",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Dcn",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Ogn",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Gas1",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

Mast cells

  • Cluster 15 is mast cells and potentially cluster 25
VlnPlot(mouse.unannotated,
        features = "Fcer1a",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Kit", # aka Cd117
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

Macrophage

  • 0,4,6,7,8,9,18,19,22,23,25
VlnPlot(mouse.unannotated,
        features = "Itgam", # aka Cd11b
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd14",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Cd68",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Ccr5",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

Neutrophils

  • Cluster 8 and 28 are neutrophils
VlnPlot(mouse.unannotated,
        features = "Ly6g",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Itgam", # aka Cd11b
        cols = cluster_colors)

VlnPlot(mouse.unannotated,
        features = "Cxcr4",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

Sandro’s markers

Idents(mouse.unannotated) <- "SCT_snn_res.0.5"
goi1 <- c("Cd3e","Trbc1","Cd4","Cd8a","Foxp3","Tbx21","Gata3","Thy1",
         "Cd19","Ms4a1","Cd27","Ighg1","Ptprc","Ly6g","Itgam","Aif1","Klrb1")
goi2 <- c("Adgre1","Ms4a3","Ly6c2","Mrc1","Lyz2","Cd74","Cd83","Cd14","H2-Aa",
          "H2-Ab1","Sirpa","Xcr1","Siglech","Itgax","Kit","Mcpt4")
goi3 <- c("Pdgfra","Col1a1","Lum","Pdgfrb","Rgs5","Cspg4","Acta2","Tagln",
          "Pecam1","Cd34","Plvap","Stmn2","Slc38a5","Vwf","Mfsd2a","Cldn5")
goi4 <- c("Prox1","Flt4","Pdpn","Lyve1","Sox10","Mbp","Fgf13","Kcnab2","Tubb3",
          "Slc17a6","Shank2","Erbb4","Park7","Kif5b","Slc4a1","Hmbs")
goi5 <- c("Pecam1","Flt4","Itgam","Mrc1","Cd3e","Gata3","Cd19","Ly6g","Pdgfrb",
          "Kit","Col1a1","Pmp22","Hmbs")

v1 <- VlnPlot(mouse.unannotated,
              features = goi1,
              cols = cluster_colors,
              split.by = "SCT_snn_res.0.5",
              flip = TRUE,
              stack = TRUE)
## Warning in FetchData.Seurat(object = object, vars = features, slot = slot): The
## following requested variables were not found: Ighg1
v1

v2 <- VlnPlot(mouse.unannotated,
              features = goi2,
              cols = cluster_colors,
              split.by = "SCT_snn_res.0.5",
              flip = TRUE,
              stack = TRUE)
v2

v3 <- VlnPlot(mouse.unannotated,
              features = goi3,
              cols = cluster_colors,
              split.by = "SCT_snn_res.0.5",
              flip = TRUE,
              stack = TRUE)
v3

v4 <- VlnPlot(mouse.unannotated,
              features = goi4,
              cols = cluster_colors,
              split.by = "SCT_snn_res.0.5",
              flip = TRUE,
              stack = TRUE)
v4

v5 <- VlnPlot(mouse.unannotated,
              features = goi5,
              cols = cluster_colors,
              split.by = "SCT_snn_res.0.5",
              flip = TRUE,
              stack = TRUE)
v5

Automatically detect markers

# work in parallel
options(mc.cores = detectCores() - 1)

# Find markers for each cluster
# Compares single cluster vs all other clusters
# Default is logfc.threshold = 0.25, min.pct = 0.5
Idents(mouse.unannotated) <- "SCT_snn_res.0.5"
all.markers <- FindAllMarkers(object = mouse.unannotated,
                              assay = "RNA",
                              test.use = "MAST",
                              verbose = TRUE)

# add column
all.markers$delta_pct <- abs(all.markers$pct.1 - all.markers$pct.2)

# rename columns and rows
rownames(all.markers) <- 1:nrow(all.markers)
all.markers <- all.markers[,c(6,7,1,5,2:4,8)]
colnames(all.markers)[c(6,7)] <- c("pct_1","pct_2")

# save
saveRDS(all.markers, "../../rObjects/mouse_cellbender_all_markers.rds")
# more stringent filtering
all.markers <- all.markers[all.markers$p_val_adj < 0.01,]

# compare 
table(all.markers$cluster)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## 2786 2260 2717 3378 2038 2667 2427 4312 2027 2350 2609 2737 2780 3817 2750 4084 
##   16   17   18   19   20   21   22   23   24 
## 3189 2783 2637 2717 2743 3922 4065 1983  915
# subset
cluster0 <- all.markers[all.markers$cluster == 0,]
cluster1 <- all.markers[all.markers$cluster == 1,]
cluster2 <- all.markers[all.markers$cluster == 2,]
cluster3 <- all.markers[all.markers$cluster == 3,]
cluster4 <- all.markers[all.markers$cluster == 4,]
cluster5 <- all.markers[all.markers$cluster == 5,]
cluster6 <- all.markers[all.markers$cluster == 6,]
cluster7 <- all.markers[all.markers$cluster == 7,]
cluster8 <- all.markers[all.markers$cluster == 8,]
cluster9 <- all.markers[all.markers$cluster == 9,]
cluster10 <- all.markers[all.markers$cluster == 10,]
cluster11 <- all.markers[all.markers$cluster == 11,]
cluster12 <- all.markers[all.markers$cluster == 12,]
cluster13 <- all.markers[all.markers$cluster == 13,]
cluster14 <- all.markers[all.markers$cluster == 14,]
cluster15 <- all.markers[all.markers$cluster == 15,]
cluster16 <- all.markers[all.markers$cluster == 16,]
cluster17 <- all.markers[all.markers$cluster == 17,]
cluster18 <- all.markers[all.markers$cluster == 18,]
cluster19 <- all.markers[all.markers$cluster == 19,]
cluster20 <- all.markers[all.markers$cluster == 20,]
cluster21 <- all.markers[all.markers$cluster == 21,]
cluster22 <- all.markers[all.markers$cluster == 22,]
cluster23 <- all.markers[all.markers$cluster == 23,]
cluster24 <- all.markers[all.markers$cluster == 24,]

# save
write.table(all.markers,
            "../../results/markers/putative_cluster_markers_pvaladj_0.01.tsv", 
            sep = "\t", quote = FALSE, row.names = FALSE)

Cluster Annotations

Cluster 0: Kdr-high BECs & LECs 1

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster0$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster0$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 1: Mrc1+H2-Eb1-low macrophages 1

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster1$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster1$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 2: Mrc1+H2-Eb1-low macrophages 2

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster2$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster2$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 3: Fibroblasts 1

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster3$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster3$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 4: Mrc1+H2-Eb1-high macrophages 1

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster4$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster4$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 5: Fibroblasts 2

Cluster 6: Kdr-high BECs & LECs 2

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster6$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster6$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 7: Neutrophils

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster7$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster7$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 8: T cells & ILCs 1

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster8$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster8$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 9: Dendritic cells

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster9$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster9$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 10: Vwf+ BECs & LECs

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster10$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster10$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 11: Mrc1+H2-Eb1-high macrophages 2

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster11$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster11$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 12: Pericytes & SMCs

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster12$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster12$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 13: Mature B cells 1

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster13$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster13$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 14: Schwann cells

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster14$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster14$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 15: T cells & ILCs 2

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster15$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster15$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 16: Mrc1+-H2-Eb1-low macrophages 3

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster16$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster16$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 17: Mast cells

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster17$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster17$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 18: Vwf+Cldn5+ BECs

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster18$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster18$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = "Siglecf",
        cols = cluster_colors,
        split.by = "SCT_snn_res.0.5")

Cluster 19: Monocytes

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster19$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster19$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 20: Myeloid progenitors

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster20$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster20$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 21: Mature B cells 2

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster21$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster21$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 22: Developing B cells 1

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster22$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster22$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 23: Developing B cells 2

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster23$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster23$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Cluster 24: Doublets BECs / macrophages

# Top 40 markers based on p_val_adj and then avg_log2FC
VlnPlot(mouse.unannotated,
        features = cluster24$gene[1:20],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

VlnPlot(mouse.unannotated,
        features = cluster24$gene[21:40],
        cols = cluster_colors,
        stack = TRUE,
        flip = TRUE,
        split.by = "SCT_snn_res.0.5")

Assign indiviudal cluster names

# Rename all identities
mouse.annotated <- RenameIdents(object = mouse.unannotated, 
                               "0" = "Kdr-high BECs & LECs 1",
                               "1" = "Mrc1+H2-Eb1-low macrophages 1",
                               "2" = "Mrc1+H2-Eb1-low macrophages 2",
                               "3" = "Fibroblasts 1",
                               "4" = "Mrc1+H2-Eb1-high macrophages 1",
                               "5" = "Fibroblasts 2",
                               "6" = "Kdr-high BECs & LECs 2",
                               "7" = "Neutrophils",
                               "8" = "T cells & ILCs 1",
                               "9" = "Dendritic cells",
                               "10" = "Vwf+ BECs & LECs",
                               "11" = "Mrc1+H2-Eb1-high macrophages 2",
                               "12" = "Pericytes & SMCs",
                               "13" = "Mature B cells 1",
                               "14" = "Schwann cells",
                               "15" = "T cells & ILCs 2",
                               "16" = "Mrc1+H2-Eb1-low macrophages 3",
                               "17" = "Mast cells",
                               "18" = "Vwf+Cldn5+ BECs",
                               "19" = "Monocytes",
                               "20" = "Myeloid progenitors",
                               "21" = "Mature B cells 2",
                               "22" = "Developing B cells 1",
                               "23" = "Developing B cells 2",
                               "24" = "Doublets BECs / macrophages"
                               )
mouse.annotated$seurat_clusters <- Idents(mouse.annotated)
mouse.annotated$individual_clusters <- Idents(mouse.annotated)

# reset levels
mouse.annotated$individual_clusters <- 
  factor(mouse.annotated$individual_clusters,
         levels = c("Pericytes & SMCs", 
                    "Vwf+Cldn5+ BECs",
                    "Vwf+ BECs & LECs",
                    "Kdr-high BECs & LECs 1", "Kdr-high BECs & LECs 2",
                    "Doublets BECs / macrophages",
                    "Mrc1+H2-Eb1-low macrophages 1", "Mrc1+H2-Eb1-low macrophages 2", "Mrc1+H2-Eb1-low macrophages 3",
                    "Mrc1+H2-Eb1-high macrophages 1", "Mrc1+H2-Eb1-high macrophages 2",
                    "Dendritic cells",
                    "Monocytes",
                    "Neutrophils",
                    "Mast cells",
                    "Myeloid progenitors",
                    "Fibroblasts 1", "Fibroblasts 2",
                    "Schwann cells",
                    "T cells & ILCs 1", "T cells & ILCs 2",
                    "Developing B cells 1", "Developing B cells 2",
                    "Mature B cells 1", "Mature B cells 2"))
# umap
umap <- DimPlot(object = mouse.annotated, 
                group.by = "individual_clusters",
                reduction = "umap",
                cols = cluster_colors) +
  theme(legend.position = "bottom")
umap

Merge cluster names

# Rename all identities
mouse.merged <- RenameIdents(object = mouse.unannotated, 
                               "0" = "Kdr-high BECs & LECs",
                               "1" = "Mrc1+H2-Eb1-low macrophages",
                               "2" = "Mrc1+H2-Eb1-low macrophages",
                               "3" = "Fibroblasts",
                               "4" = "Mrc1+H2-Eb1-high macrophages",
                               "5" = "Fibroblasts",
                               "6" = "Kdr-high BECs & LECs",
                               "7" = "Neutrophils",
                               "8" = "T cells & ILCs",
                               "9" = "Dendritic cells",
                               "10" = "Vwf+ BECs & LECs",
                               "11" = "Mrc1+H2-Eb1-high macrophages",
                               "12" = "Pericytes & SMCs",
                               "13" = "Mature B cells",
                               "14" = "Schwann cells",
                               "15" = "T cells & ILCs",
                               "16" = "Mrc1+H2-Eb1-low macrophages",
                               "17" = "Mast cells",
                               "18" = "Vwf+Cldn5+ BECs",
                               "19" = "Monocytes",
                               "20" = "Myeloid progenitors",
                               "21" = "Mature B cells",
                               "22" = "Developing B cells",
                               "23" = "Developing B cells",
                               "24" = "Doublets BECs / macrophages"
                               )

# save idents and set levels
mouse.annotated$merged_clusters <- Idents(mouse.merged)
mouse.annotated$merged_clusters <- 
  factor(mouse.annotated$merged_clusters,
         levels = c("Pericytes & SMCs", 
                    "Vwf+Cldn5+ BECs",
                    "Vwf+ BECs & LECs",
                    "Kdr-high BECs & LECs",
                    "Doublets BECs / macrophages",
                    "Mrc1+H2-Eb1-low macrophages",
                    "Mrc1+H2-Eb1-high macrophages",
                    "Dendritic cells",
                    "Monocytes",
                    "Neutrophils",
                    "Mast cells",
                    "Myeloid progenitors",
                    "Fibroblasts",
                    "Schwann cells",
                    "T cells & ILCs",
                    "Developing B cells",
                    "Mature B cells"))
remove(mouse.merged)
# set colors
merged_colors <- c("darkred","firebrick1","darkorange","gold","green",
                   "darkolivegreen2","darkgreen","cyan","cornflowerblue","blue",
                   "darkorchid1","deeppink","plum1","burlywood3","azure3","azure4", "black")

# umap
umap <- DimPlot(object = mouse.annotated, 
        reduction = "umap",
        repel = TRUE,
        group.by = "merged_clusters",
        cols = merged_colors)
umap

Clustering QC after annotation

3D UMAP

embeddings <- mouse.annotated@reductions$umap@cell.embeddings
embeddings <- cbind(embeddings, as.character(mouse.annotated$merged_clusters))
colnames(embeddings)[4] <- "merged_clusters"
embeddings <- as.data.frame(embeddings)

three.dim <- plot_ly(embeddings,
                     x = ~UMAP_1, 
                     y = ~UMAP_2, 
                     z = ~UMAP_3, 
                     color = ~merged_clusters, 
                     colors = merged_colors,
                     size = 1) 
three.dim <- three.dim %>% add_markers() 
three.dim <- three.dim %>% layout(scene = list(xaxis = list(title = 'UMAP_1'), 
                                     yaxis = list(title = 'UMAP_2'), 
                                     zaxis = list(title = 'UMAP_3')))
three.dim

Isoform, sex, age

# Apoe isoform
umap1 <- DimPlot(object = mouse.annotated, 
                 group.by = "merged_clusters",
                 reduction = "umap",
                 split.by = "isoform",
                 repel = TRUE,
                 cols = merged_colors)
umap1

# age
umap2 <- DimPlot(object = mouse.annotated, 
                 group.by = "merged_clusters",
                 reduction = "umap",
                 split.by = "age",
                 repel = TRUE,
                 cols = merged_colors)
umap2

# sex
umap3 <- DimPlot(object = mouse.annotated, 
                 group.by = "merged_clusters",
                 reduction = "umap",
                 split.by = "sex",
                 repel = TRUE,
                 cols = merged_colors)
umap3

# sample
umap4 <- DimPlot(object = mouse.annotated, 
                 group.by = "merged_clusters",
                 reduction = "umap",
                 split.by = "sample",
                 ncol = 2,
                 repel = TRUE,
                 cols = merged_colors)
umap4

# phase
umap5 <- DimPlot(object = mouse.annotated, 
                 group.by = "merged_clusters",
                 reduction = "umap",
                 split.by = "Phase",
                 cols = merged_colors,
                 repel = TRUE)
umap5

# mito.factor
umap6 <- DimPlot(object = mouse.annotated, 
                 group.by = "merged_clusters",
                 reduction = "umap",
                 split.by = "mito.factor",
                 cols = merged_colors,
                 ncol = 2,
                 repel = TRUE)
umap6

Feature plots

# UMAP percent.mt
f1 <- FeaturePlot(mouse.annotated, 
            reduction = "umap", 
            features = "percent.mt")  + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f1

# UMAP nCount
f2 <- FeaturePlot(mouse.annotated, 
            reduction = "umap",
            features = "nCount_RNA") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f2

# UMAP nFeature
f3 <- FeaturePlot(mouse.annotated, 
            reduction = "umap", 
            features = "nFeature_RNA") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f3

# UMAP percent.ribo
f4 <- FeaturePlot(mouse.annotated, 
            reduction = "umap", 
            features = "percent.ribo.protein") + 
  scale_colour_gradientn(colours = c("blue","lightblue","yellow","orange","red"))
f4

Ttr

VlnPlot(mouse.annotated,
        features = "Ttr",
        split.by = "sample",
        group.by = "sample",
        cols = sample_colors)

VlnPlot(mouse.annotated,
        features = "Ttr",
        split.by = "merged_clusters",
        cols = merged_colors)

FeaturePlot(object = mouse.annotated, 
            features = "Ttr")

Percent cells per cluster

# isoform
b1 <- mouse.annotated@meta.data %>%
  group_by(merged_clusters, isoform) %>%
  dplyr::count() %>%
  group_by(merged_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=merged_clusters,y=percent, fill=isoform)) +
  theme_classic() +
  geom_col() +
  scale_fill_manual(values = isoform_colors) +
  ggtitle("Percentage of isoform per cluster") +
  theme(axis.text.x = element_text(angle = 90))
b1

# sex
b2 <- mouse.annotated@meta.data %>%
  group_by(merged_clusters, sex) %>%
  dplyr::count() %>%
  group_by(merged_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=merged_clusters,y=percent, fill=sex)) +
  theme_classic() +
  geom_col() +
  scale_fill_manual(values = sex_colors) +
  ggtitle("Percentage of sex per cluster") +
  theme(axis.text.x = element_text(angle = 90))
b2

# age
b3 <- mouse.annotated@meta.data %>%
  group_by(merged_clusters, age) %>%
  dplyr::count() %>%
  group_by(merged_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=merged_clusters,y=percent, fill=age)) +
  theme_classic() +
  geom_col() +
  scale_fill_manual(values = age_colors) +
  ggtitle("Percentage of age per cluster") +
  theme(axis.text.x = element_text(angle = 90))
b3

# sample
b4 <- mouse.annotated@meta.data %>%
  group_by(merged_clusters, sample) %>%
  dplyr::count() %>%
  group_by(merged_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=merged_clusters,y=percent, fill=sample)) +
  theme_classic() +
  geom_col() +
  scale_fill_manual(values = sample_colors) +
  ggtitle("Percentage of age per cluster") +
  theme(axis.text.x = element_text(angle = 90))

# phase
b5 <- mouse.annotated@meta.data %>%
  group_by(merged_clusters, Phase) %>%
  dplyr::count() %>%
  group_by(merged_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=merged_clusters,y=percent, fill=Phase)) +
  theme_classic() +
  geom_col() +
  ggtitle("Percentage of age per cluster") +
  theme(axis.text.x = element_text(angle = 90))
b5

# mito.factor
b6 <- mouse.annotated@meta.data %>%
  group_by(merged_clusters, mito.factor) %>%
  dplyr::count() %>%
  group_by(merged_clusters) %>%
  dplyr::mutate(percent = 100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=merged_clusters,y=percent, fill=mito.factor)) +
  theme_classic() +
  geom_col() +
  ggtitle("Percentage of age per cluster") +
  theme(axis.text.x = element_text(angle = 90))
b6

Cluster tree

Idents(mouse.annotated) <- mouse.annotated$individual_clusters
mouse.annotated <- BuildClusterTree(object = mouse.annotated,
                                     dims = 1:21,
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)

tree <- mouse.annotated@tools$BuildClusterTree
tree$tip.label <- paste0(tree$tip.label)

p <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = merged_colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
p

Idents(mouse.annotated) <- mouse.annotated$merged_clusters
mouse.annotated <- BuildClusterTree(object = mouse.annotated,
                                     dims = 1:21,
                                     reorder = FALSE,
                                     reorder.numeric = FALSE)

tree <- mouse.annotated@tools$BuildClusterTree
tree$tip.label <- paste0(tree$tip.label)

p <- ggtree::ggtree(tree, aes(x, y)) +
  scale_y_reverse() +
  ggtree::geom_tree() +
  ggtree::theme_tree() +
  ggtree::geom_tiplab(offset = 1) +
  ggtree::geom_tippoint(color = merged_colors[1:length(tree$tip.label)], shape = 16, size = 5) +
  coord_cartesian(clip = 'off') +
  theme(plot.margin = unit(c(0,2.5,0,0), 'cm'))
p

Stacked violins

goi1 <- c("Cd3e","Trbc1","Cd4","Cd8a","Foxp3","Tbx21","Gata3","Thy1",
         "Cd19","Ms4a1","Cd27","Ptprc","Ly6g","Itgam","Aif1","Klrb1")
goi2 <- c("Adgre1","Ms4a3","Ly6c2","Mrc1","Lyz2","Cd74","Cd83","Cd14","H2-Aa",
          "H2-Ab1","Sirpa","Xcr1","Siglech","Itgax","Kit","Mcpt4")
goi3 <- c("Pdgfra","Col1a1","Lum","Pdgfrb","Rgs5","Cspg4","Acta2","Tagln",
          "Pecam1","Cd34","Plvap","Stmn2","Slc38a5","Vwf","Mfsd2a","Cldn5")
goi4 <- c("Prox1","Flt4","Pdpn","Lyve1","Sox10","Mbp","Fgf13","Kcnab2","Tubb3",
          "Slc17a6","Shank2","Erbb4","Park7","Kif5b","Slc4a1","Hmbs")
goi5 <- c("Pecam1","Flt4","Itgam","Mrc1","Cd3e","Gata3","Cd19","Ly6g","Pdgfrb",
          "Kit","Col1a1","Pmp22","Hmbs")
goi6 <- c("Pdgfrb","Rgs5","Pecam1","Vwf","Cldn5","Kdr","Prox1","Flt4","Ptprc",
          "Itgam","Aif1","Mrc1","H2-Eb1","Itgax","Ccr2","Ly6c2","Ly6g","Kit",
          "Mki67","Col1a2","Plp1","Trbc2","Gata3","Il7r","Cd19","Ms4a1")
Idents(mouse.annotated) <- "merged_clusters"

v1 <- VlnPlot(mouse.annotated, 
        features = goi1,
        split.by = 'merged_clusters',
        cols = merged_colors,
        flip = TRUE,
        stack = TRUE) +
  theme(legend.position = "none", axis.text.x = element_text(angle = 90))
v1

v2 <- VlnPlot(mouse.annotated, 
        features = goi2,
        split.by = 'merged_clusters',
        cols = merged_colors,
        flip = TRUE,
        stack = TRUE) +
  theme(legend.position = "none", axis.text.x = element_text(angle = 90))
v2

v3 <- VlnPlot(mouse.annotated, 
        features = goi3,
        split.by = 'merged_clusters',
        cols = merged_colors,
        flip = TRUE,
        stack = TRUE) +
  theme(legend.position = "none", axis.text.x = element_text(angle = 90))
v3

v4 <- VlnPlot(mouse.annotated, 
        features = goi4,
        split.by = 'merged_clusters',
        cols = merged_colors,
        flip = TRUE,
        stack = TRUE) +
  theme(legend.position = "none", axis.text.x = element_text(angle = 90))
v4

v5 <- VlnPlot(mouse.annotated, 
        features = goi5,
        split.by = 'merged_clusters',
        cols = merged_colors,
        flip = TRUE,
        stack = TRUE) +
  theme(legend.position = "none", axis.text.x = element_text(angle = 90))
v5

v6 <- VlnPlot(mouse.annotated, 
        features = goi6,
        split.by = 'merged_clusters',
        cols = merged_colors,
        flip = TRUE,
        stack = TRUE) +
  theme(legend.position = "none", axis.text.x = element_text(angle = 90))
v6

Differential expression

E4 vs E3 within each cluster

# intitialize variables
cell_types <- unique(mouse.annotated$merged_clusters)
isoform.df <- data.frame()

# loop through clusters
for (i in cell_types) {
  print(i)
  cluster <- subset(mouse.annotated, merged_clusters == i)
  Idents(cluster) <- cluster$isoform
  markers <- FindMarkers(object = cluster,
                         ident.1 = "E4",
                         ident.2 = "E3",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  if(nrow(markers) == 0) {
    next
  }
  markers$cluster <- i
  markers$gene <- rownames(markers)
  isoform.df <- rbind(isoform.df, markers)
}

# reformat table
colnames(isoform.df)[c(3,4)] <- c("percent_E4","percent_E3")
rownames(isoform.df) <- 1:nrow(isoform.df)
isoform.df$percent_difference <- abs(isoform.df$percent_E4 - isoform.df$percent_E3)
isoform.df <- isoform.df[,c(6,7,1,5,2,3,4,8)]

# write table
write.table(isoform.df, "../../results/DEGs/E4_vs_E3_DEGs.tsv", 
            sep = "\t", quote = FALSE, row.names = FALSE)

Female vs male within each cluster

cell_types <- unique(mouse.annotated$merged_clusters)
sex.df <- data.frame()

for (i in cell_types) {
  print(i)
  cluster <- subset(mouse.annotated, merged_clusters == i)
  Idents(cluster) <- cluster$sex
  markers <- FindMarkers(object = cluster,
                         ident.1 = "Female",
                         ident.2 = "Male",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  if(nrow(markers) == 0) {
    next
  }
  markers$cluster <- i
  markers$gene <- rownames(markers)
  sex.df <- rbind(sex.df, markers)
}

# reformat table
colnames(sex.df)[c(3,4)] <- c("percent_female","percent_male")
rownames(sex.df) <- 1:nrow(sex.df)
sex.df$percent_difference <- abs(sex.df$percent_female - sex.df$percent_male)
sex.df <- sex.df[,c(6,7,1,5,2,3,4,8)]

# write table
write.table(sex.df, 
            "../../results/DEGs/female_vs_male_DEGs.tsv", sep = "\t",
            quote = FALSE, row.names = FALSE)

14 vs 2 months within each cluster

# initialize variables
cell_types <- unique(mouse.annotated$merged_clusters)
age.df <- data.frame()

# loop through clusters
for (i in cell_types) {
  print(i)
  cluster <- subset(mouse.annotated, merged_clusters == i)
  Idents(cluster) <- cluster$age
  markers <- FindMarkers(object = cluster,
                         ident.1 = "14 months",
                         ident.2 = "2 months",
                         only.pos = FALSE, # default
                         min.pct = 0.10,  # default
                         test.use = "MAST",
                         verbose = TRUE,
                         assay = "RNA")
  if(nrow(markers) == 0) {
    next
  }
  markers$cluster <- i
  markers$gene <- rownames(markers)
  age.df <- rbind(age.df, markers)
}

# reformat table
colnames(age.df)[c(3,4)] <- c("percent_14mo","percent_2mo")
rownames(age.df) <- 1:nrow(age.df)
age.df$percent_difference <- abs(age.df$percent_14mo - age.df$percent_2mo)
age.df <- age.df[,c(6,7,1,5,2,3,4,8)]

# write table
write.table(age.df, 
            "../../results/DEGs/14_vs_2_months_DEGs.tsv", sep = "\t",
            quote = FALSE, row.names = FALSE)

DEG tables and plots

Compare DEGs

# read tables
isoform.df <- read.table(
  "../../results/DEGs/E4_vs_E3_DEGs.tsv",
  sep = "\t", header = TRUE)
age.df <- read.table(
  "../../results/DEGs/14_vs_2_months_DEGs.tsv",
  sep = "\t", header = TRUE)
sex.df <- read.table(
  "../../results/DEGs/female_vs_male_DEGs.tsv",
  sep = "\t", header = TRUE)

# filter
isoform.df <- isoform.df[isoform.df$p_val_adj < 0.05,]
age.df <- age.df[age.df$p_val_adj < 0.05,]
sex.df <- sex.df[sex.df$p_val_adj < 0.05,]

# add columns
direction <- isoform.df$avg_log2FC > 0
direction <- gsub(TRUE, "isoform_up", direction)
direction <- gsub(FALSE, "isoform_down", direction)
isoform.df$direction <- direction
direction <- age.df$avg_log2FC > 0
direction <- gsub(TRUE, "age_up", direction)
direction <- gsub(FALSE, "age_down", direction)
age.df$direction <- direction
direction <- sex.df$avg_log2FC > 0
direction <- gsub(TRUE, "sex_up", direction)
direction <- gsub(FALSE, "sex_down", direction)
sex.df$direction <- direction

# reformat tables
isoform.df2 <- isoform.df %>%
  dplyr::count(cluster,direction) %>%
  tidyr::spread(cluster, n)
age.df2 <- age.df %>%
  dplyr::count(cluster,direction) %>%
  tidyr::spread(cluster, n)
sex.df2 <- sex.df %>%
  dplyr::count(cluster,direction) %>%
  tidyr::spread(cluster, n)

# master table
df <- smartbind(isoform.df2, sex.df2, age.df2)
df

Upset plot

# read tables
isoform.df <- read.table("../../results/DEGs/E4_vs_E3_DEGs.tsv",
                         sep = "\t", header = TRUE)
age.df <- read.table("../../results/DEGs/14_vs_2_months_DEGs.tsv",
                         sep = "\t", header = TRUE)
sex.df <- read.table("../../results/DEGs/female_vs_male_DEGs.tsv",
                         sep = "\t", header = TRUE)

# filter tables
isoform.df <- isoform.df[isoform.df$p_val_adj < 0.05,]
age.df <- age.df[age.df$p_val_adj < 0.05,]
sex.df <- sex.df[sex.df$p_val_adj < 0.05,]

clusters <- unique(mouse.annotated$merged_clusters)
for (i in clusters) {
  # Subset df by cluster
  isoform <- subset(isoform.df, isoform.df$cluster == i)
  sex <- subset(sex.df, sex.df$cluster == i)
  age <- subset(age.df, age.df$cluster == i)
  
  # Subset lists
  isoform_up <- subset(isoform$gene, isoform$avg_log2FC > 0)
  isoform_down <- subset(isoform$gene, isoform$avg_log2FC < 0)
  sex_up <- subset(sex$gene, sex$avg_log2FC > 0)
  sex_down <- subset(sex$gene, sex$avg_log2FC < 0)
  age_up <- subset(age$gene, age$avg_log2FC > 0)
  age_down <- subset(age$gene, age$avg_log2FC < 0)
  list_input <- list("Age Up-regulated" = age_up,
                     "Isoform Up-regulated" = isoform_up,
                     "Sex Up-regulated" = sex_up,
                     "Age Down-regulated" = age_down,
                     "Isoform Down-regulated" = isoform_down,
                     "Sex Down-regulated" = sex_down)
  data <- fromList(list_input)
  
  # store names
  names <- c("Sex Down-regulated","Isoform Down-regulated","Age Down-regulated",
             "Sex Up-regulated","Isoform Up-regulated","Age Up-regulated")
  
  # plot
  upset_gene <- ComplexUpset::upset(data, 
                      names,
                      set_sizes=(
                        upset_set_size()
                        + geom_text(aes(label=..count..), hjust=1.1, stat='count')
                        + expand_limits(y=500)),
                      queries = list(upset_query("Age Up-regulated", fill = "red"),
                                     upset_query("Isoform Up-regulated", fill = "red"),
                                     upset_query("Sex Up-regulated", fill = "red"),
                                     upset_query("Age Down-regulated", fill = "blue"),
                                     upset_query("Isoform Down-regulated", fill = "blue"),
                                     upset_query("Sex Down-regulated", fill = "blue")),
                      base_annotations = list('Intersection size' = (
                        intersection_size(bar_number_threshold=1, width=0.5)
                        + scale_y_continuous(expand=expansion(mult=c(0, 0.05)),limits = c(0,400)) # space on top
                        + theme(
                              # hide grid lines
                              panel.grid.major=element_blank(),
                              panel.grid.minor=element_blank(),
                              # show axis lines
                              axis.line=element_line(colour='black')))),
                      stripes = upset_stripes(
                        geom=geom_segment(size=12),  # make the stripes larger
                        colors=c('grey95', 'white')),
                      # to prevent connectors from getting the colorured
                      # use `fill` instead of `color`, together with `shape='circle filled'`
                      matrix = intersection_matrix(
                        geom=geom_point(
                          shape='circle filled',
                          size=3,
                          stroke=0.45)),
                      sort_sets=FALSE,
                      sort_intersections='descending'
                    )
  upset_gene <- upset_gene + ggtitle(paste0(i,", adj_p_val < 0.05"))
  i <- gsub(" ","_",i)
  i <- gsub("/","_",i)
  i <- gsub("-","_",i)
  pdf(paste0("../../results/upset/upset_",tolower(i),".pdf"), height = 6, width = 8)
  print(upset_gene)
  dev.off()
}

Volcano

variables <- c("sex","age","isoform")
all_clusters <- unique(mouse.annotated$merged_clusters)

for (i in variables) {
  
  # read DEG file
  if (i == "sex") {
    treatment_vs_control <- 
      read.delim("../../results/DEGs/female_vs_male_DEGs.tsv",
                 sep = "\t")
  } else if (i == "age") {
    treatment_vs_control <-
      read.delim("../../results/DEGs/14_vs_2_months_DEGs.tsv",
                 sep = "\t")
  } else {
    treatment_vs_control <-
      read.delim("../../results/DEGs/E4_vs_E3_DEGs.tsv",
                 sep = "\t")
  }
  
  # assign colors
  color_values <- vector()
  max <- nrow(treatment_vs_control)
  for(row in 1:max){
    if (treatment_vs_control$p_val_adj[row] < 0.05){
      if (treatment_vs_control$avg_log2FC [row] > 0){
        color_values <- c(color_values, 1) # 1 when logFC > 0 and FDRq < 0.05
      }
      else if (treatment_vs_control$avg_log2FC[row] < 0){
        color_values <- c(color_values, 2) # 2 when logFC < 0 and FDRq < 0.05
      }
    }
    else{
      color_values <- c(color_values, 3) # 3 when FDRq >= 0.05
    }
  }
  treatment_vs_control$color_adjpval_0.05 <- factor(color_values)
  
  # loop through clusters
  for (j in all_clusters) {
    
    # subset cluster
    data <- subset(treatment_vs_control, cluster == j)
    
    # plot only if there are DEGs with p_val_adj < 0.05
    num <- subset(data, p_val_adj < 0.05)
    num <- nrow(num)
    if(num != 0) {
        
      # subset genes to label
      up <- data[data$color_adjpval_0.05 == 1,]
      up10 <- up[1:10,]
      down <- data[data$color_adjpval_0.05 == 2,]
      down10 <- down[1:10,]
      
      # set manual colors
      if (!1 %in% unique(data$color_adjpval_0.05)) {
        my_colors <- c("blue","gray")
      } else if (!2 %in% unique(data$color_adjpval_0.05)) {
        my_colors <- c("red","gray")
      } else if (!1 %in% unique(data$color_adjpval_0.05) && !2 %in% unique(data$color_adjpval_0.05)) {
        my_colors <- c("gray")
      } else {
        my_colors <- c("red","blue","gray")
      }
      
      # set significance threshold
      hadjpval <- (-log10(max(
        data$p_val[data$p_val_adj < 0.05], 
        na.rm=TRUE)))

      # plot
      p <-
        ggplot(data = data, 
               aes(x = avg_log2FC,  # x-axis is logFC
                   y = -log10(p_val),  # y-axis will be -log10 of P.Value
                   color = color_adjpval_0.05)) +  # color is based on factored color column
        geom_point(alpha = 0.8, size = 2) +  # create scatterplot, alpha makes points transparent
        theme_bw() +  # set color theme
        theme(legend.position = "none") +  # no legend
        scale_color_manual(values = my_colors) +  # set factor colors
        labs(
          title = "", # no main title
          x = expression(log[2](FC)), # x-axis title
          y = expression(-log[10] ~ "(" ~ italic("p") ~ "-value)") # y-axis title
        ) +
        theme(axis.title.x = element_text(size = 10),
              axis.text.x = element_text(size = 10)) +
        theme(axis.title.y = element_text(size = 10),
              axis.text.y = element_text(size = 10)) +
        geom_hline(yintercept = hadjpval,  #  horizontal line
                           colour = "#000000",
                           linetype = "dashed") +
        ggtitle(paste0(j,"\n",i,", p_val_adj < 0.05")) +
        geom_text_repel(data = up10,
                        aes(x = avg_log2FC, y= -log10(p_val), label = gene), 
                        color = "maroon", 
                        fontface="italic",
                        max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                        ) +
        geom_text_repel(data = down10,
                        aes(x = avg_log2FC, y= -log10(p_val), label = gene), 
                        color = "navyblue", 
                        fontface="italic",
                        max.overlaps = getOption("ggrepel.max.overlaps", default = 30)
                        )
      p
      
      # save
      clus <- j
      clus <- gsub(" ","_",clus)
      clus <- gsub("/","_",clus)
      clus <- gsub("-","_",clus)
      if (i == "sex") {
        path <- paste0("../../results/volcano/sex/",
                       tolower(clus),"_sex_volcano")
      } else if (i == "age") {
        path <- paste0("../../results/volcano/age/",
                       tolower(clus),"_age_volcano")      
      } else {
        path <- paste0("../../results/volcano/isoform/",
                       tolower(clus),"_isoform_volcano")      
      }
      pdf(paste(path,".pdf"), height = 8, width = 8)
      print(p)
      dev.off()
      
      print(paste("i =",i,", j =",j))
      
    }
  } # end loop through clusters
} # end loop through variables

Pseudotime

  • https://www.nature.com/articles/s41467-020-14766-3
  • Colonna B cell paper
    • Single-cell pseudotime trajectories were inferred using diffusion map algorithms implemented through the R package destiny (43). Normalized expression values were used as input for the generation of diffusion maps. Cells were ordered based on the first diffusion component. To further visualize B lineage differentiation, pseudotime was inferred using the slingshot R package (44). Gene Ontology analysis for biological processes enriched in different B cell populations was performed on the top 50 transcripts in each cluster using Metascape (https://metascape.org) (45). Receptor-ligand interactions were mined from the scRNA-seq data using the NicheNet algorithm (46). In brief, NicheNet analysis was performed using the marker genes for all clusters of dura B cell and dura fibroblast using default settings. After calculation of interaction scores between possible receptor-ligand combinations, pairs were filtered for those bona fide interactions that were documented in the literature and publicly available databases. ## Slingshot
palette(brewer.pal(8, "Dark2"))
data(countMatrix, package = "tradeSeq")
counts <- countMatrix
rm(countMatrix)
data(crv, package = "tradeSeq")
data(celltype, package = "tradeSeq")



# Fit developmental trajectories
set.seed(200)
rd <- reducedDims(crv)
cl <- kmeans(rd, centers = 7)$cluster
plot(rd, col = brewer.pal(9, "Set1")[cl], pch = 16, asp = 1,
     cex = 2/3)

embeddings <- as.data.frame(mouse.annotated@reductions$umap@cell.embeddings)[,c(1,2)]
cluster <- mouse.annotated$merged_clusters
plot(embeddings, 
     col = brewer.pal(11, "Set1")[cl], 
     pch = 16, 
     asp = 1,
     cex = 2/3)

Fit trajectories using slingshot

Fit negative binomial model

Within lineage comparison

  • Discovering progenitor markers genes
    • In order to discover marker genes of the progenitor or differentiated cell population, researchers may be interested in assessing differential expression between the progenitor cell population (i.e., the starting point of a lineage) with the differentiated cell type population (i.e., the end point of a lineage).
    • The function startVsEndTest uses a Wald test to assess the null hypothesis that the average expression at the starting point of the smoother (progenitor population) is equal to the average expression at the end point of the smoother (differentiated population).
    • The test basically involves a comparison between two smoother coefficients for every lineage.
    • The function startVsEndTest performs a global test across all lineages by default (i.e. it compares the start and end positions for all lineages simultaneously), but you can also assess all lineages separately by setting lineages=TRUE. Below, we adopt an omnibus test across the two lineages.

Between lineage comparison

Discover gene w/ different expression patterns

Shiny App

Cleanup object

mouse.annotated@assays$RNA@var.features <- 
  mouse.annotated@assays$SCT@var.features
metadata <- mouse.annotated@meta.data
metadata <- metadata[,c(29,30,2:14,16:26)]
mouse.annotated@meta.data <- metadata
mouse.annotated@assays$SCT@meta.features <- metadata
mouse.annotated@assays$RNA@meta.features <- metadata

Output directory

# make shiny folder
DefaultAssay(mouse.annotated) <- "RNA"
Idents(mouse.annotated) <- mouse.annotated$merged_clusters
sc.config <- createConfig(mouse.annotated)
makeShinyApp(mouse.annotated, sc.config, gene.mapping = TRUE,
             shiny.title = "Mouse Single Cell")